Read in the data

We loop trough the folders containing the filtered matrix files, barcodes and genenames to read in the data in form of SingleCellExperiment (sce) objects.

library(scran)
library(scater)
library(DropletUtils)
library(ggplot2)
library(Matrix)
library(cowplot)
library(openxlsx)
MT_genes <- read.table("~/Dropbox/Postdoc/2019-12-29_BE2020/Additional_files/MT_genes.txt", sep = "\t", header = TRUE)
folders <- list.files("~/Dropbox/Postdoc/2019-12-29_BE2020/Data_v3/", full.names = TRUE)
folder.names <- list.files("~/Dropbox/Postdoc/2019-12-29_BE2020/Data_v3/", full.names = FALSE)

# Initialization of list to store sce objects
# Uncommand this when no other list exists
sce.list <- list()
# If other samples have already been analysed, load in list containing previous samples
# sce.list <- readRDS("~/Dropbox/Postdoc/2019-10-29_Gastric_IM/All_sce.rds")

# Read in the data
for(i in 1:length(folders)){
  if(folder.names[i] %in% names(sce.list)){next}
  else{
    cur_sce <- read10xCounts(paste0(folders[i],"/outs/filtered_feature_bc_matrix/"))
    colData(cur_sce)$Patient <- as.character(sapply(folder.names, 
                                         function(n){unlist(strsplit(n, "_"))[1]})[i])
    colData(cur_sce)$Tissue <- as.character(sapply(folder.names, 
                                        function(n){unlist(strsplit(n, "_"))[3]})[i])
    sce.list[[folder.names[i]]] <- cur_sce 
  }
}

# Save list
saveRDS(sce.list, "~/Dropbox/Postdoc/2019-12-29_BE2020/All_sce_unfiltered.rds")

Plot QC features

Next we filter the cells based on several criteria. For this, we will plot these QC features first.

#read in the file if running only this chunk
##sce.list <- readRDS("~/Dropbox/Postdoc/2019-10-29_Gastric_IM///All_sce_unfiltered.rds")
for(i in 1:length(sce.list)){
  cur_sce <- sce.list[[i]]
  if(!("total_features_by_counts" %in% names(colData(cur_sce)))){
    cur_sce <- suppressMessages(calculateQCMetrics(cur_sce))
    sce.list[[i]] <- cur_sce
  }
  
  # Library size
  print(ggplot(as.data.frame(colData(cur_sce))) + 
    geom_point(aes(1:ncol(cur_sce), log10(total_counts))) +
    xlab("Cells")+ggtitle(names(sce.list)[i]))
  
  # Number of genes detected
  print(ggplot(as.data.frame(colData(cur_sce))) + 
    geom_point(aes(1:ncol(cur_sce), total_features_by_counts)) +
    xlab("Cells")+ggtitle(names(sce.list)[i]))
  
  # Mitochondrial genes
  print(ggplot(data.frame(MT_genes = Matrix::colSums(counts(cur_sce)[rownames(cur_sce) %in%
                                                           MT_genes$Gene.stable.ID,])/
        Matrix::colSums(counts(cur_sce)))*100) +
    geom_point(aes(1:ncol(cur_sce), MT_genes)) + 
     ylab("% mitochondrial reads") + xlab("Cells")+ggtitle(names(sce.list)[i]))
  
  # Marker gene expression
  # VIM
  print(ggplot(data.frame(VIM = log10(counts(cur_sce)["ENSG00000026025",] + 1))) +
    geom_point(aes(1:ncol(cur_sce), VIM)) + xlab("Cells") +
    ylab("log10[VIM]")+ggtitle(names(sce.list)[i]))

  # PTPRC
  print(ggplot(data.frame(PTPRC = log10(counts(cur_sce)["ENSG00000081237",] + 1))) +
    geom_point(aes(1:ncol(cur_sce), PTPRC)) + xlab("Cells") +
    ylab("log10[PTPRC]")+ggtitle(names(sce.list)[i]))
}
## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

## Warning: 'calculateQCMetrics' is deprecated.
## Use 'perCellQCMetrics' or 'perFeatureQCMetrics' instead.

# Create xlsx file with entries for QC thresholds
df <- data.frame(names = names(sce.list),
           lower_total_counts = rep(0, length(sce.list)),
           upper_total_counts = rep(0, length(sce.list)),
           lower_total_features = rep(0, length(sce.list)),
           upper_total_features = rep(0, length(sce.list)),
           lower_mito = rep(0, length(sce.list)),
           upper_mito = rep(0, length(sce.list)),
           VIM = rep(0, length(sce.list)),
           PTPRC = rep(0, length(sce.list)))

write.xlsx(df, "~/Dropbox/Postdoc/2019-12-29_BE2020/Additional_files/QC_metrics.xlsx")

saveRDS(sce.list, "~/Dropbox/Postdoc/2019-12-29_BE2020/All_sce_unfiltered.rds")

Removal of low-quality cells

Here we remove cells based on the filtering thresholds defined by visualizing the QC features.

QC_thresholds <- read.xlsx("~/Dropbox/Postdoc/2019-12-29_BE2020/Additional_files/QC_metrics_edited.xlsx")
# sce.list <- readRDS("~/Dropbox/Postdoc/2019-10-29_Gastric_IM///All_sce_unfiltered.rds")

for(i in 1:length(sce.list)){
  cur_sce <- sce.list[[i]]
  
  # Total counts 
  cur_sce <- cur_sce[,log10(colData(cur_sce)$total_counts) >
                       QC_thresholds[i,"lower_total_counts"] &
                       log10(colData(cur_sce)$total_counts) <
                       QC_thresholds[i,"upper_total_counts"]]
  
  # Total features 
  cur_sce <- cur_sce[,colData(cur_sce)$total_features_by_counts >
                       QC_thresholds[i,"lower_total_features"] &
                       colData(cur_sce)$total_features_by_counts <
                       QC_thresholds[i,"upper_total_features"]]
  
  # Mitochondria 
  cur_sce <- cur_sce[,(Matrix::colSums(counts(cur_sce)[rownames(cur_sce) %in%
                MT_genes$Gene.stable.ID,])/Matrix::colSums(counts(cur_sce)))*100 >
                       QC_thresholds[i,"lower_mito"] &
                       (Matrix::colSums(counts(cur_sce)[rownames(cur_sce) %in%
                MT_genes$Gene.stable.ID,])/Matrix::colSums(counts(cur_sce)))*100 <
                       QC_thresholds[i,"upper_mito"]]
  
  # VIM
  if(!is.na(QC_thresholds[i,"VIM"])){
    cur_sce <- cur_sce[,counts(cur_sce)["ENSG00000026025",] <=
                         QC_thresholds[i,"VIM"]]
  }
  
  # PTPRC
  if(!is.na(QC_thresholds[i,"PTPRC"])){
    cur_sce <- cur_sce[,counts(cur_sce)["ENSG00000081237",] <=
                         QC_thresholds[i,"PTPRC"]]
  }
  sce.list[[i]] <- cur_sce
}

saveRDS(sce.list, "~/Dropbox/Postdoc/2019-12-29_BE2020/All_sce_filtered.rds")

End Matter

To finish get session info:

sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Fedora 31 (Workstation Edition)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] openxlsx_4.1.4              cowplot_1.0.0              
##  [3] Matrix_1.2-18               DropletUtils_1.6.1         
##  [5] scater_1.14.6               ggplot2_3.2.1              
##  [7] scran_1.14.6                SingleCellExperiment_1.8.0 
##  [9] SummarizedExperiment_1.16.1 DelayedArray_0.12.2        
## [11] BiocParallel_1.20.1         matrixStats_0.55.0         
## [13] Biobase_2.46.0              GenomicRanges_1.38.0       
## [15] GenomeInfoDb_1.22.0         IRanges_2.20.2             
## [17] S4Vectors_0.24.3            BiocGenerics_0.32.0        
## [19] rmarkdown_2.1              
## 
## loaded via a namespace (and not attached):
##  [1] viridis_0.5.1            edgeR_3.28.0             BiocSingular_1.2.1      
##  [4] viridisLite_0.3.0        DelayedMatrixStats_1.8.0 R.utils_2.9.2           
##  [7] assertthat_0.2.1         statmod_1.4.33           dqrng_0.2.1             
## [10] GenomeInfoDbData_1.2.2   vipor_0.4.5              yaml_2.2.1              
## [13] pillar_1.4.3             lattice_0.20-38          glue_1.3.1              
## [16] limma_3.42.2             digest_0.6.24            XVector_0.26.0          
## [19] colorspace_1.4-1         htmltools_0.4.0          R.oo_1.23.0             
## [22] pkgconfig_2.0.3          zlibbioc_1.32.0          purrr_0.3.3             
## [25] scales_1.1.0             HDF5Array_1.14.2         tibble_2.1.3            
## [28] farver_2.0.3             withr_2.1.2              lazyeval_0.2.2          
## [31] magrittr_1.5             crayon_1.3.4             evaluate_0.14           
## [34] R.methodsS3_1.8.0        beeswarm_0.2.3           tools_3.6.2             
## [37] formatR_1.7              lifecycle_0.1.0          stringr_1.4.0           
## [40] Rhdf5lib_1.8.0           munsell_0.5.0            locfit_1.5-9.1          
## [43] zip_2.0.4                irlba_2.3.3              compiler_3.6.2          
## [46] rsvd_1.0.2               rlang_0.4.4              rhdf5_2.30.1            
## [49] grid_3.6.2               RCurl_1.98-1.1           BiocNeighbors_1.4.1     
## [52] igraph_1.2.4.2           labeling_0.3             bitops_1.0-6            
## [55] gtable_0.3.0             R6_2.4.1                 gridExtra_2.3           
## [58] knitr_1.28               dplyr_0.8.4              stringi_1.4.5           
## [61] ggbeeswarm_0.6.0         Rcpp_1.0.3               tidyselect_1.0.0        
## [64] xfun_0.12